#!/usr/local/bin/python
#-*- encoding:utf-8 -*- 
import codecs 
from whoosh.index import create_in  
from whoosh.fields import *  
from chinesetokenizer import ChineseAnalyzer
from whoosh import qparser
import ConfigParser
from multiprocessing import Pool
from whoosh.collectors import *
from whoosh.index import open_dir
import utils

config = ConfigParser.ConfigParser();
config.read('config');

def toFinalResults(queryString, queryID, results):
    highest = -1;
    if len(results) == 0:
        highest = -1;
        s = '%f\t%s\t%s' %(highest, queryID, queryString.strip());
    else:
        hit = results[0];
        highest = hit.score;
        s = '%f\t%s\t%s\t%s\t%s' %(highest, queryID, queryString, hit.fields()['appid'].encode('utf-8'), hit.fields()['content'].strip().encode('utf-8'));
    return s;



ix = open_dir(config.get('bm25', 'MulFieldsIndex'));
searcher = ix.searcher(); 
parser = qparser.MultifieldParser(['content', 'description'], ix.schema, fieldboosts={'content':1.00, 'description':0.0000000001}, group=qparser.syntax.OrGroup);


def processChunk(line):
    if line == None:
        return None;
    li = line.strip().split('\t');
    queryString = li[1];
    queryID = li[0];
    print queryID;
    query = parser.parse(queryString.decode('utf-8'));
    try:
        c = searcher.collector(limit=1);
        tlc = TimeLimitCollector(c, timelimit=10);
        searcher.search_with_collector(query, tlc);
        results = tlc.results();
        s = toFinalResults(queryString, queryID, results);
        s= '%s\n' %(s,);
    except TimeLimit:
        s = None;  
    return s; 

with open('/home/wangshuxin/app_search/log/format_log', 'r') as f:
    i = 0;
    count = 0;
    resultsFile = open('searchLogResults', 'w');
    K = 150;
    pool = Pool(processes=K);

    for chunk in utils.grouper(K, f):
        results = pool.map(processChunk, chunk);
        for result in results:
            if result:
                resultsFile.write(result);
            else:
                count += 1;
    print 'Total %d query time exceeds limts' % (count, );
